{
 "metadata": {
  "name": "",
  "signature": "sha256:228a8be585b4253aecc7019b23371e373fe182619107187072f8e45cc0eb74cd"
 },
 "nbformat": 3,
 "nbformat_minor": 0,
 "worksheets": [
  {
   "cells": [
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "import pandas as pd\n",
      "from twitterSentiment import models\n",
      "import nltk\n",
      "import re"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 10
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "print(models.TwitterText.objects.count())"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "20690\n"
       ]
      }
     ],
     "prompt_number": 2
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "tweets = models.TwitterText.objects.all()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 3
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "from django_pandas.io import read_frame   #make sure to install django-pandashttps://github.com/chrisdev/django-pandas\n",
      "df = read_frame(tweets)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 60
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "def remove_urls(text):\n",
      "    matched_url = \"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))\"\n",
      "    matched_username = \"@+[\\w_\\:]+\"\n",
      "    #return (re.sub(matched_url,\"\",text))\n",
      "    return (re.sub(matched_url+\"|\"+matched_username,\"\",text))\n",
      "\n",
      "df['tweets_filtered'] = df['twitter_text'].apply(remove_urls)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 58
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "tweetlist=list(df['tweets_filtered'])\n",
      "print (len(tweetlist))\n",
      "commonwords=nltk.FreqDist(tweetlist)\n",
      "print (commonwords.most_common(30))"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "20690\n",
        "[('RT  December Portfolio Review with Adjustments to $USO, $SQQQ, $RIG, $AAPL, $ARO, $DXD, $USO, $BTU and $VLO -- .co/\u2026', 531), ('RT  Stock Analyst Finds Best Options Strategies $AAPL $BABA $AMZN $FNMA .co/bI14g1rdlT', 342), ('RT  Stock Analyst Issues New Reports and Opinions on Top Stocks $PLKD $STTK $FVRG $CLF $BABA $AAPL .co/HJAg43ggPM', 168), ('RT  Stock Analyst Finds Best Options Strategies $AAPL $BABA $AMZN $FNMA .co/5mkb4YwJbk', 167), ('RT  $wag .co/DkWTsMPJIc', 98), ('RT  Stock Analyst Issues New Reports and Opinions on Top Stocks $PLKD $STTK $FVRG $CLF $BABA $AAPL .co/Hj79m7RvOT', 36), (' Yung Fokiss Barz - Official Music Video $GM .co/AFUcoZ416E ? Check it out 10,000 + Views', 22), ('RT  The US owns 261,498,926.230 oz. of gold valued at  $312 billion, about the market cap of Johnson &amp; Johnson. $JNJ\\nhttp:/\u2026', 19), ('RT  A couple hours of DETAILED secrets to making BIG MONEY on $AAPL #trades over and over again! .co/NXm9DOrH7X', 14), ('RT  .co/nmJIt67zoC $dkts $stz $deo. #tequila #drinktopshelf #ComingSoon   #subpenny  #.005 imo', 13), ('#Nasdaq100 #recent #market #exit #2: Covered $YHOO short for a 0.79% #gain in 11 days. #YHOO #forex #trading #stocks', 11), ('[Case Study] How A 24 yr old student Made &gt;$0.5 Million In 8 Months With Stocks .co/lbZzSo6C44 TASR $EOG $XLE $JNUG $DG $ERX', 11), ('Tip #4 How to identify a HOT SECTOR? .co/cFuOlMXLZp #stocks #trading $DGLY $GILD $UGAZ $BIIB $MA', 11), ('RT  #ASH14 $BMY $MRK - Blood paper on 9p24.1 &amp; JAK2 amplification \u2192 PD-1 ligand \u2191expression in HL\\n.co/pVaqKFoQIp http:\u2026', 11), ('  Yung Fokiss Barz - Official Music Video $GM .co/AFUcoZ416E ? Check it out 10,000 + Views', 10), ('RT  Movers and Shakers: Making Range Advances: $ISNS $BTU $AMBA $ZION $BKW $GILD $STI $RF $FRAN $SIMG $ARRY $C $SWHC $VIAB \u2026', 10), ('Tip #4 How to identify a HOT SECTOR? .co/0CrdLCphOY #stocks #trading $DGLY $GILD $UGAZ $BIIB $MA', 9), ('RT  Apple among 5 big IBD 50 winners from much-cheaper oil .co/lCy6WHi00o  $AAPL $SAVE $CMG .co/I5fWx6wdpt', 9), ('[Case Study] How A 24 yr old student Made &gt;$0.5 Million In 8 Months With Stocks .co/lbZzSo6C44 $AVGO $UNH $TLM $MPEL $UA', 9), ('RT  Why Ford shares are poised for a Santa Claus rally of their own: .co/bEsgD32JLk $F .co/fS1jDJwIGw', 9), ('[Case Study] How A 24 yr old student Made &gt;$0.5 Million In 8 Months With Stocks .co/Fv2ZjiN52T TASR $EOG $XLE $JNUG $DG $ERX', 9), ('RT  #imagine \"Do I have enough $wag for you Y/N?\" Calum sends you. .co/Wx7HFHDDaM', 9), (' Yung Fokiss Barz - Official Music Video $GM .co/AFUcoZ416E ? 10,000 + Views', 9), ('.co/OM9hSaF2XI &lt;-- 2 stocks trading books: 1 entertaining and 1 educational! Both awesome! $TLT $SYNA $AAPL $FEYE $FB $BAC', 9), ('[Case Study] How A 24 yr old student Made $0.48 Million In 8 Months With Stocks .co/Ek8S5gUhfU $AGIO $CVX $FCAU $PKI $UAL', 9), (\"RT  Our  has begun his predictions for 2015. Here's Part 1: .co/gsCkTzL9Vu $FB $TWTR $NFLX $AAPL $GOOG $Y\u2026\", 9), ('[Case Study] How A 24 yr old student Made &gt;$0.5 Million In 8 Months With Stocks .co/Ek8S5gUhfU $AVGO $UNH $TLM $MPEL $UA', 8), ('Video: #Trade $AAPL #FREE! 20141205 .co/tNHGyKVw0i #trading #startups #stocks #money #investor #angel #investoreducation #capital', 8), ('RT  $BBRY here is the 60 minutes VIDEO Disrupting Cancer - CBS News .co/Gu81E5V9Ik', 8), ('Tip #4 How to identify a HOT SECTOR? .co/ewe2BEND4R #stocks #trading $DGLY $GILD $UGAZ $BIIB $MA', 8)]\n"
       ]
      }
     ],
     "prompt_number": 59
    }
   ],
   "metadata": {}
  }
 ]
}